{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "3GvsqHtXQvvX"
   },
   "source": [
    "# Initial Setups\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "MTrRCzgmFwYn"
   },
   "source": [
    "## (Google Colab use only)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "xC1S4mWFFv5U"
   },
   "outputs": [],
   "source": [
    "# Use Google Colab\n",
    "use_colab = True\n",
    "\n",
    "# Is this notebook running on Colab?\n",
    "# If so, then google.colab package (github.com/googlecolab/colabtools)\n",
    "# should be available in this environment\n",
    "\n",
    "# Previous version used importlib, but we could do the same thing with\n",
    "# just attempting to import google.colab\n",
    "try:\n",
    "    from google.colab import drive\n",
    "    colab_available = True\n",
    "except:\n",
    "    colab_available = False\n",
    "\n",
    "if use_colab and colab_available:\n",
    "    drive.mount('/content/drive')\n",
    "\n",
    "    # cd to the appropriate working directory under my Google Drive\n",
    "    %cd 'drive/My Drive/cs696ds_lexalytics/Language Model Finetuning'\n",
    "    \n",
    "    # Install packages specified in requirements\n",
    "    !pip install -r requirements.txt\n",
    "    \n",
    "    # List the directory contents\n",
    "    !ls"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "uZ5SZXkbEp34"
   },
   "source": [
    "## Experiment Parameters\n",
    "\n",
    "**NOTE**: The following `experiment_id` MUST BE CHANGED in order to avoid overwriting the files from other experiments!!!!!!\n",
    "\n",
    "**NOTE 2**: The values for the variables in the cell below can be overridden by `papermill` at runtime. Variables in other cells cannot be changed in this manner."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "rUqrV6VeEs3Z",
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "# We will use the following string ID to identify this particular (training) experiments\n",
    "# in directory paths and other settings\n",
    "experiment_id = 'lm_finetuning_bert_nli'\n",
    "\n",
    "# Random seed\n",
    "random_seed = 696\n",
    "\n",
    "# Dataset size related\n",
    "total_subset_proportion = 1.0 # Do we want to use the entirety of the training set, or some parts of it?\n",
    "validation_dataset_proportion = 0.1 # Proportion to be reserved for validation (after selecting random subset with total_subset_proportion)\n",
    "\n",
    "# Training hyperparameters\n",
    "num_train_epochs = 20 # Number of epochs\n",
    "per_device_train_batch_size = 16 # training batch size PER COMPUTE DEVICE\n",
    "per_device_eval_batch_size = 16 # evaluation batch size PER COMPUTE DEVICE\n",
    "learning_rate = 1e-5\n",
    "weight_decay = 0.01"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "kSItEk35-R8o"
   },
   "source": [
    "## Package Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "id": "o-jSRWQfLL4U"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Python version: 3.8.5 (default, Sep  3 2020, 21:29:08) [MSC v.1916 64 bit (AMD64)]\n",
      "NumPy version: 1.19.2\n",
      "PyTorch version: 1.7.1\n",
      "Transformers version: 4.4.0.dev0\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "import os\n",
    "import random\n",
    "\n",
    "import numpy as np\n",
    "import torch\n",
    "import transformers\n",
    "import datasets\n",
    "\n",
    "import utils\n",
    "\n",
    "# Random seed settings\n",
    "random.seed(random_seed)\n",
    "np.random.seed(random_seed)\n",
    "torch.manual_seed(random_seed)\n",
    "\n",
    "# Print version information\n",
    "print(\"Python version: \" + sys.version)\n",
    "print(\"NumPy version: \" + np.__version__)\n",
    "print(\"PyTorch version: \" + torch.__version__)\n",
    "print(\"Transformers version: \" + transformers.__version__)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "rkKDoXUp-UIi"
   },
   "source": [
    "## PyTorch GPU settings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "id": "je9BT2pQIpUx"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "PyTorch device selected: cpu\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Mrinal\\anaconda3\\lib\\site-packages\\torch\\cuda\\__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at  ..\\c10\\cuda\\CUDAFunctions.cpp:100.)\n",
      "  return torch._C._cuda_getDeviceCount() > 0\n"
     ]
    }
   ],
   "source": [
    "if torch.cuda.is_available():    \n",
    "    torch_device = torch.device('cuda')\n",
    "\n",
    "    # Set this to True to make your output immediately reproducible\n",
    "    # Note: https://pytorch.org/docs/stable/notes/randomness.html\n",
    "    torch.backends.cudnn.deterministic = False\n",
    "    \n",
    "    # Disable 'benchmark' mode: Set this False if you want to measure running times more fairly\n",
    "    # Note: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936\n",
    "    torch.backends.cudnn.benchmark = True\n",
    "    \n",
    "    # Faster Host to GPU copies with page-locked memory\n",
    "    use_pin_memory = True\n",
    "    \n",
    "    # Number of compute devices to be used for training\n",
    "    training_device_count = torch.cuda.device_count()\n",
    "\n",
    "    # CUDA libraries version information\n",
    "    print(\"CUDA Version: \" + str(torch.version.cuda))\n",
    "    print(\"cuDNN Version: \" + str(torch.backends.cudnn.version()))\n",
    "    print(\"CUDA Device Name: \" + str(torch.cuda.get_device_name()))\n",
    "    print(\"CUDA Capabilities: \"+ str(torch.cuda.get_device_capability()))\n",
    "    print(\"Number of CUDA devices: \"+ str(training_device_count))\n",
    "    \n",
    "else:\n",
    "    torch_device = torch.device('cpu')\n",
    "    use_pin_memory = False\n",
    "    \n",
    "    # Number of compute devices to be used for training\n",
    "    training_device_count = 1\n",
    "\n",
    "print()\n",
    "print(\"PyTorch device selected:\", torch_device)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "H3txs5s7Q1UG"
   },
   "source": [
    "# Further pre-training"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "gEnUsBDUOLAm"
   },
   "source": [
    "## Load the BERT-base-uncased-MNLI model\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "id": "W3TCqS-3OOIj"
   },
   "outputs": [],
   "source": [
    "tokenizer = transformers.AutoTokenizer.from_pretrained(\"textattack/bert-base-uncased-MNLI\", cache_dir='./bert_base_uncased-MNLI')\n",
    "model = transformers.AutoModelForSequenceClassification.from_pretrained(\"textattack/bert-base-uncased-MNLI\", cache_dir='./bert_base_cache')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "PrLTLsRFRUKK"
   },
   "source": [
    "## Load the Yelp dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "yelp = datasets.load_dataset(\n",
    "    './dataset_scripts/yelp_restaurants/yelp_NLI.py',\n",
    "    data_files={\n",
    "        'train': 'dataset_files/yelp_restaurants/yelp_academic_dataset_review.json',\n",
    "        'restaurant_ids': 'dataset_files/yelp_restaurants/restaurantIDs.txt',\n",
    "        'pos_sentiment': 'dataset_files/opinion_lexicon/positive-words.txt',\n",
    "        'neg_sentiment': 'dataset_files/opinion_lexicon/negative-words.txt'\n",
    "    },\n",
    "    cache_dir='./dataset_cache')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "2m9wZvndVCvy"
   },
   "outputs": [],
   "source": [
    "data_train = yelp['train']\n",
    "print(\"Number of training data (original):\", len(data_train))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "WrB-ZoTz5-zb"
   },
   "outputs": [],
   "source": [
    "data_train_selected = data_train.shuffle(seed=random_seed).select(np.arange(0, int(len(data_train) * total_subset_proportion)))\n",
    "print(\"Number of training data (subset):\", len(data_train_selected))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "sQ_RaowTYC9X"
   },
   "outputs": [],
   "source": [
    "# Check out how individual data points look like\n",
    "print(data_train_selected[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "WqyEwcDGSJjs"
   },
   "source": [
    "### Preprocessing: Encode the text with Tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "tTVxGSY4oe7e"
   },
   "outputs": [],
   "source": [
    "train_dataset = data_train_selected.map(\n",
    "    lambda e: tokenizer(e['premise'], e['hypothesis'], truncation=True),\n",
    "    batched=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Train-validation split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Training set size after validation split\n",
    "new_train_dataset_size = int(len(train_dataset) * (1 - validation_dataset_proportion))\n",
    "new_valid_dataset_size = len(train_dataset) - new_train_dataset_size\n",
    "\n",
    "new_train_dataset = train_dataset.select(indices=np.arange(new_train_dataset_size))\n",
    "new_valid_dataset = train_dataset.select(indices=np.arange(new_train_dataset_size, new_train_dataset_size + new_valid_dataset_size))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Training dataset after split:\", len(new_train_dataset))\n",
    "print(\"Validation dataset after split:\", len(new_valid_dataset))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "p1N4PUdHZeIm"
   },
   "source": [
    "## Pre-train further"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "PD6lfHh8mURq"
   },
   "source": [
    "### Training settings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# How many training steps would we have?\n",
    "approx_total_training_steps = len(new_train_dataset) // (per_device_train_batch_size * training_device_count) * num_train_epochs\n",
    "\n",
    "print(\"There will be approximately %d training steps.\" % approx_total_training_steps)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "id": "8ps4XzQxmTgS"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Mrinal\\anaconda3\\lib\\site-packages\\torch\\cuda\\__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at  ..\\c10\\cuda\\CUDAFunctions.cpp:100.)\n",
      "  return torch._C._cuda_getDeviceCount() > 0\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "Mixed precision training with AMP or APEX (`--fp16`) and FP16 evaluation can only be used on CUDA devices.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-6-c00e419e326c>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m training_args = transformers.TrainingArguments(\n\u001b[0m\u001b[0;32m      2\u001b[0m     \u001b[0moutput_dir\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'.'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'progress'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexperiment_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'results'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;31m# output directory\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0moverwrite_output_dir\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m     \u001b[0mnum_train_epochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnum_train_epochs\u001b[0m\u001b[1;33m,\u001b[0m              \u001b[1;31m# total number of training epochs\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m     \u001b[0mper_device_train_batch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mper_device_train_batch_size\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\transformers\\training_args.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, output_dir, overwrite_output_dir, do_train, do_eval, do_predict, evaluation_strategy, prediction_loss_only, per_device_train_batch_size, per_device_eval_batch_size, per_gpu_train_batch_size, per_gpu_eval_batch_size, gradient_accumulation_steps, eval_accumulation_steps, learning_rate, weight_decay, adam_beta1, adam_beta2, adam_epsilon, max_grad_norm, num_train_epochs, max_steps, lr_scheduler_type, warmup_ratio, warmup_steps, logging_dir, logging_strategy, logging_first_step, logging_steps, save_strategy, save_steps, save_total_limit, no_cuda, seed, fp16, fp16_opt_level, fp16_backend, fp16_full_eval, local_rank, tpu_num_cores, tpu_metrics_debug, debug, dataloader_drop_last, eval_steps, dataloader_num_workers, past_index, run_name, disable_tqdm, remove_unused_columns, label_names, load_best_model_at_end, metric_for_best_model, greater_is_better, ignore_data_skip, sharded_ddp, deepspeed, label_smoothing_factor, adafactor, group_by_length, report_to, ddp_find_unused_parameters, dataloader_pin_memory, skip_memory_metrics)\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\transformers\\training_args.py\u001b[0m in \u001b[0;36m__post_init__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    543\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    544\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mis_torch_available\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtype\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;34m\"cuda\"\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfp16\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfp16_full_eval\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 545\u001b[1;33m             raise ValueError(\n\u001b[0m\u001b[0;32m    546\u001b[0m                 \u001b[1;34m\"Mixed precision training with AMP or APEX (`--fp16`) and FP16 evaluation can only be used on CUDA devices.\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    547\u001b[0m             )\n",
      "\u001b[1;31mValueError\u001b[0m: Mixed precision training with AMP or APEX (`--fp16`) and FP16 evaluation can only be used on CUDA devices."
     ]
    }
   ],
   "source": [
    "training_args = transformers.TrainingArguments(\n",
    "    output_dir=os.path.join('.', 'progress', experiment_id, 'results'), # output directory\n",
    "    overwrite_output_dir=True,\n",
    "    num_train_epochs=num_train_epochs,              # total number of training epochs\n",
    "    per_device_train_batch_size=per_device_train_batch_size,\n",
    "    per_device_eval_batch_size=per_device_eval_batch_size,\n",
    "    evaluation_strategy='epoch',\n",
    "    save_strategy='epoch',\n",
    "    logging_dir=os.path.join('.', 'progress', experiment_id, 'logs'), # directory for storing logs\n",
    "    logging_first_step=True,\n",
    "    weight_decay=weight_decay,               # strength of weight decay\n",
    "    seed=random_seed,\n",
    "    learning_rate=learning_rate,\n",
    "    fp16=True,\n",
    "    fp16_backend='amp',\n",
    "    prediction_loss_only=True,\n",
    "    load_best_model_at_end=True,\n",
    "    dataloader_num_workers=training_device_count * 2,\n",
    "    dataloader_pin_memory=use_pin_memory\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "RQsRp9lMZstE"
   },
   "outputs": [],
   "source": [
    "print(training_args.n_gpu)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "metric = datasets.load_metric('glue', 'mnli')\n",
    "\n",
    "def compute_metrics(eval_pred):\n",
    "    predictions, labels = eval_pred\n",
    "    predictions = predictions[:, 0]\n",
    "    return metric.compute(predictions=predictions, references=labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "1OQJ8IOIYHXb"
   },
   "outputs": [],
   "source": [
    "trainer = transformers.Trainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    train_dataset=new_train_dataset,\n",
    "    eval_dataset=new_valid_dataset,\n",
    "    tokenizer=tokenizer,\n",
    "    compute_metrics=compute_metrics\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ZTx5562OmXx5"
   },
   "source": [
    "### Training loop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "-_v3lAPvb9DK"
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "wLj4Ico8vwhO"
   },
   "source": [
    "### Save the model to the local directory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "WLViPwdTvvxP"
   },
   "outputs": [],
   "source": [
    "trainer.save_model(os.path.join('.', 'trained_models', experiment_id))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "SIQn4r1oVJp6"
   },
   "outputs": [],
   "source": [
    "tokenizer.save_pretrained(os.path.join('.', 'trained_models', experiment_id))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "RNUCURCduqYa"
   },
   "source": [
    "## LM Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "u1XN8ot3us18"
   },
   "outputs": [],
   "source": [
    "eval_results = trainer.evaluate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "zKkBZEpF07Ip"
   },
   "outputs": [],
   "source": [
    "print(eval_results)\n",
    "\n",
    "perplexity = np.exp(eval_results[\"eval_loss\"])\n",
    "\n",
    "print(perplexity)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "I_bAx1rL0KFb"
   },
   "source": [
    "## Playing with my own input sentences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.04422219097614288, 0.037514906376600266, 0.9182628989219666]\n",
      "['[CLS]', 'the', 'old', 'one', 'always', 'comfort', '##ed', 'ca', \"'\", 'da', '##an', ',', 'except', 'today', '.', '[SEP]', 'ca', \"'\", 'da', '##an', 'knew', 'the', 'old', 'one', 'very', 'well', '.', '[SEP]']\n"
     ]
    }
   ],
   "source": [
    "sequence_0 = \"At the other end of Pennsylvania Avenue, people began to line up for a White House tour.\"\n",
    "sequence_1 = \"People formed a line at the end of Pennsylvania Avenue.\"\n",
    "\n",
    "sequence_0 = \"The Old One always comforted Ca'daan, except today.\"\n",
    "sequence_1 = \"Ca'daan knew the Old One very well.\"\n",
    "\n",
    "x = tokenizer.encode_plus(sequence_0, sequence_1,  truncation=True, return_tensors=\"pt\").to(torch_device)\n",
    "\n",
    "logits = model(**x)[0]\n",
    "\n",
    "entailment_results = torch.softmax(logits, dim=1).tolist()[0]\n",
    "\n",
    "print(entailment_results)\n",
    "\n",
    "example_tokens = []\n",
    "for id in x[0].ids:\n",
    "    example_tokens.append(tokenizer.convert_ids_to_tokens(id))\n",
    "\n",
    "print (example_tokens)"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "name": "lm_further_pretraining_bert_amazon_electronics.ipynb",
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
